import os, sys, inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
sys.path.append('..')
from prediction_denoise import prediction
from glob import glob
import librosa
import IPython.display as ipd
from signal_utils import audio_files_to_numpy, numpy_audio_to_matrix_spectrogram
from data_plot import plot_spectrogram, plot_3_spectograms
import matplotlib.pyplot as plt
Read noisy voices to clean them up.
noisy_voices_list = glob('..\\data\\validation\\noisy_voice\\*')
noisy_voices_list = [os.path.basename(voice) for voice in noisy_voices_list]
print(noisy_voices_list)
['karol_birds.wav', 'karol_klawiatura.wav', 'karol_myszka.wav', 'marcin_klawiatura.wav']
def predict(audio_input_prediction, audio_output_prediction, sr=8000, name_model='model_unet'):
# path to find pre-trained weights / save models
weights_path = '..\\data\\weights'
# pre trained model
name_model = name_model
# directory where read noisy sound to denoise
audio_dir_prediction = '..\\data\\validation\\noisy_voice'
# directory to save the denoise sound
dir_save_prediction = '..\\data\\validation\\save_prediction\\'
# Name noisy sound file to denoise
audio_input_prediction = [audio_input_prediction]
# Name of denoised sound file to save
audio_output_prediction = audio_output_prediction
# Sample rate to read audio
sample_rate = sr
# Minimum duration of audio files to consider
min_duration = 1.0
# Frame length for training data
frame_length = 8064
# hop length for sound files
hop_length_frame = 8064
# nb of points for fft(for spectrogram computation)
n_fft = 255
# hop length for fft
hop_length_fft = 63
prediction(weights_path, name_model, audio_dir_prediction, dir_save_prediction, audio_input_prediction,
audio_output_prediction, sample_rate, min_duration, frame_length, hop_length_frame, n_fft,
hop_length_fft)
for voice in noisy_voices_list:
predict(audio_input_prediction=voice, audio_output_prediction=f'pred_{voice}')
Loaded model from disk 128 (22, 128, 128) (22, 128, 128) 8064 63 Loaded model from disk 128 WARNING:tensorflow:5 out of the last 5 calls to <function Model.make_predict_function.<locals>.predict_function at 0x000001C943E80160> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. (22, 128, 128) (22, 128, 128) 8064 63 Loaded model from disk 128 WARNING:tensorflow:6 out of the last 6 calls to <function Model.make_predict_function.<locals>.predict_function at 0x000001C943E1C820> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. (18, 128, 128) (18, 128, 128) 8064 63 Loaded model from disk 128 (5, 128, 128) (5, 128, 128) 8064 63
pred_voices_list = glob('..\\data\\validation\\save_prediction\\*')
pred_voices_list = [os.path.basename(voice) for voice in pred_voices_list]
print(pred_voices_list)
['pred_karol_birds.wav', 'pred_karol_klawiatura.wav', 'pred_karol_myszka.wav', 'pred_marcin_klawiatura.wav']
real_noisy, sr = librosa.load(os.path.join('..\\data\\validation\\noisy_voice',noisy_voices_list[3]), sr=8000)
real_pred, sr = librosa.load(os.path.join('..\\data\\validation\\save_prediction',pred_voices_list[3]), sr=8000)
print('Noisy voice')
ipd.display(ipd.Audio(real_noisy, rate=8000))
print('Predicted voice')
ipd.display(ipd.Audio(real_pred, rate=8000))
Noisy voice
Predicted voice
real_noisy_db, real_noisy_pha = numpy_audio_to_matrix_spectrogram(
real_noisy.reshape(1,42057), int(1335 / 2) + 1, 1335, 63
)
real_pred_db, real_pred_pha = numpy_audio_to_matrix_spectrogram(
real_pred.reshape(1,40320 ), int(1279 / 2) + 1, 1279, 63
)
plot_spectrogram(
real_noisy_db[0,:,:],
sr, # sample rate
63, # hop_length_fft
)
plot_spectrogram(
real_pred_db[0,:,:],
sr, # sample rate
63, # hop_length_fft
)
real_noisy, sr = librosa.load(os.path.join('..\\data\\validation\\noisy_voice',noisy_voices_list[1]), sr=8000)
real_pred, sr = librosa.load(os.path.join('..\\data\\validation\\save_prediction',pred_voices_list[1]), sr=8000)
print('Noisy voice')
ipd.display(ipd.Audio(real_noisy, rate=8000))
print('Predicted voice')
ipd.display(ipd.Audio(real_pred, rate=8000))
Noisy voice
Predicted voice
real_noisy, sr = librosa.load(os.path.join('..\\data\\validation\\noisy_voice',noisy_voices_list[2]), sr=8000)
real_pred, sr = librosa.load(os.path.join('..\\data\\validation\\save_prediction',pred_voices_list[2]), sr=8000)
print('Noisy voice')
ipd.display(ipd.Audio(real_noisy, rate=8000))
print('Predicted voice')
ipd.display(ipd.Audio(real_pred, rate=8000))
Noisy voice
Predicted voice
real_noisy, sr = librosa.load(os.path.join('..\\data\\validation\\noisy_voice',noisy_voices_list[0]), sr=8000)
real_pred, sr = librosa.load(os.path.join('..\\data\\validation\\save_prediction',pred_voices_list[0]), sr=8000)
print('Noisy voice')
ipd.display(ipd.Audio(real_noisy, rate=8000))
print('Predicted voice')
ipd.display(ipd.Audio(real_pred, rate=8000))
Noisy voice
Predicted voice